\({\color{darkblue}{\textbf{Factors}}}\)


  • Making strings into categorical variables

Basic String

example_strings <- c("medium", "slow", "slow", "medium", "fast")
class(example_strings)
## [1] "character"

Categorical Factor

example_factor <- factor(example_strings)
class(example_factor)
## [1] "factor"
## [1] medium slow   slow   medium fast  
## Levels: fast medium slow

Ordinal Factor

example_ordered_factor <- factor(example_strings, ordered = TRUE, levels = c("slow","medium","fast"))
class(example_ordered_factor)
## [1] "ordered" "factor"
## [1] medium slow   slow   medium fast  
## Levels: slow < medium < fast

\({\color{darkblue}{\textbf{Vectors}}}\)


  • One dimensional array
  • The elements in a vector must all have the same data type

\({\color{dodgerblue}{\textbf{Creating}}}\)

Basic Vector

eg_vector <- c(1, 2, 3)
eg_vector
## [1] 1 2 3

Named Vector

names(eg_vector) <- c("item1", "item2", "item3")
eg_vector
## item1 item2 item3 
##     1     2     3

\({\color{dodgerblue}{\textbf{Operations}}}\)

\({\color{skyblue}{\textrm{- Adding Vectors}}}\)

Within

sum(eg_vector)
## [1] 6

Between

c(1, 2, 3) + c(4, 5, 6) =

c((1 + 4), (2 + 5), (3 + 6)) =

c(5, 7, 9)

A_vector <- c(1, 2, 3)
B_vector <- c(4, 5, 6)
A_vector + B_vector
## [1] 5 7 9

\({\color{skyblue}{\textrm{- Evaluating}}}\)

  • < = less than
  • > = greater than
  • <= = less than or equal to
  • >= = greater than or equal to
  • == = equal to each other
  • ! = not (e.g. !=)
  • & = and
  • | = or

Testing

A_vector > 2
## [1] FALSE FALSE  TRUE

Extracting True Values

A_vector[A_vector > 2]
## [1] 3

\({\color{dodgerblue}{\textbf{Indexing}}}\)

In R, indexing starts at 1

Extract a specific item

eg_vector[1]
## item1 
##     1

Extract a set of specific items

eg_vector[c(1,3)]
## item1 item3 
##     1     3

Extract a range of items

eg_vector[1:3]
## item1 item2 item3 
##     1     2     3

Extract a specific named item

eg_vector["item1"]
## item1 
##     1

\({\color{dodgerblue}{\textbf{Editing}}}\)

eg_vector["item1"] <- 100

eg_vector
## item1 item2 item3 
##   100     2     3

\({\color{darkblue}{\textbf{Matrices}}}\)


  • Two dimensional array
  • The elements in a matrix must all have the same data type

\({\color{dodgerblue}{\textbf{Creating}}}\)

Filling by Row

matrix(1:9,byrow=TRUE,nrow=3)
##      [,1] [,2] [,3]
## [1,]    1    2    3
## [2,]    4    5    6
## [3,]    7    8    9

Filling by Column

matrix(1:9,byrow=FALSE,nrow=3)
##      [,1] [,2] [,3]
## [1,]    1    4    7
## [2,]    2    5    8
## [3,]    3    6    9

Naming

eg_matrix <- matrix(1:9,byrow=TRUE,nrow=3)

rownames(eg_matrix) <- c("MA", "NY", "CO")
colnames(eg_matrix) <- c("2010", "2011", "2013")

eg_matrix
##    2010 2011 2013
## MA    1    2    3
## NY    4    5    6
## CO    7    8    9

\({\color{dodgerblue}{\textbf{Operations}}}\)

Row Sums

rowSums(eg_matrix)
## MA NY CO 
##  6 15 24

Column Sums

colSums(eg_matrix)
## 2010 2011 2013 
##   12   15   18

Adding Matrices

matrix_a <- matrix(1:9,byrow=TRUE,nrow=3)

matrix_b <- matrix(1:9,byrow=FALSE,nrow=3)

matrix_a + matrix_b
##      [,1] [,2] [,3]
## [1,]    2    6   10
## [2,]    6   10   14
## [3,]   10   14   18

\({\color{dodgerblue}{\textbf{Indexing}}}\)

Extract a specific item

eg_matrix[1,2]
## [1] 2

Extract a subset

eg_matrix[1:2,2:3]
##    2011 2013
## MA    2    3
## NY    5    6

Extract a specific subset

eg_matrix[c(1,3),2:3]
##    2011 2013
## MA    2    3
## CO    8    9

Extract a row and all columns

eg_matrix[1,]
## 2010 2011 2013 
##    1    2    3

Extract a column and all rows

eg_matrix[,1]
## MA NY CO 
##  1  4  7

\({\color{dodgerblue}{\textbf{Editing}}}\)

Adding a column

cbind(eg_matrix, eg_vector)
##    2010 2011 2013 eg_vector
## MA    1    2    3       100
## NY    4    5    6         2
## CO    7    8    9         3

Adding a row

rbind(eg_matrix, eg_vector)
##           2010 2011 2013
## MA           1    2    3
## NY           4    5    6
## CO           7    8    9
## eg_vector  100    2    3

Replacing a specific value

eg_matrix[1,2] <- 100
eg_matrix
##    2010 2011 2013
## MA    1  100    3
## NY    4    5    6
## CO    7    8    9

\({\color{darkblue}{\textbf{Dataframes}}}\)


  • Two-dimensional objects
  • Within a column all elements must have the same data type, but different columns can have different data types

\({\color{dodgerblue}{\textbf{Creating}}}\)

name <- c("Mercury", "Venus", "Earth", "Mars")
type <- c("Terrestrial", "Terrestrial", "Terrestrial", "Terrestrial")
diameter <- c(0.382, 0.949, 1, 0.532)
rotation <- c(58.64, -243.02, 1, 1.03)
rings <- c(FALSE, FALSE, FALSE, FALSE)

planets_df <- data.frame(name, type, diameter, rotation, rings)

planets_df
name type diameter rotation rings
Mercury Terrestrial 0.382 58.64 FALSE
Venus Terrestrial 0.949 -243.02 FALSE
Earth Terrestrial 1.000 1.00 FALSE
Mars Terrestrial 0.532 1.03 FALSE

\({\color{dodgerblue}{\textbf{Operations}}}\)

Order Ascending

planets_df[order(planets_df$diameter),"name"]
## [1] Mercury Mars    Venus   Earth  
## Levels: Earth Mars Mercury Venus

Order Descending

planets_df[order(planets_df$diameter,decreasing=TRUE),"name"]
## [1] Earth   Venus   Mars    Mercury
## Levels: Earth Mars Mercury Venus

\({\color{dodgerblue}{\textbf{Indexing}}}\)

Extract a specific item

planets_df[2,1]
## [1] Venus
## Levels: Earth Mars Mercury Venus

Extract a subset

planets_df[2:3,1:2]
name type
2 Venus Terrestrial
3 Earth Terrestrial

Extract a specific subset

planets_df[c(1,3),1:2]
name type
1 Mercury Terrestrial
3 Earth Terrestrial

Extract all rows of a column

planets_df[,1]
## [1] Mercury Venus   Earth   Mars   
## Levels: Earth Mars Mercury Venus

Extract all columns of a row

planets_df[1,]
name type diameter rotation rings
Mercury Terrestrial 0.382 58.64 FALSE

Extract column by name

planets_df[,"diameter"]
## [1] 0.382 0.949 1.000 0.532

Extract column by name

planets_df$rings
## [1] FALSE FALSE FALSE FALSE

Extract based on values

planets_df[planets_df$rotation<0,]
name type diameter rotation rings
2 Venus Terrestrial 0.949 -243.02 FALSE

\({\color{darkblue}{\textbf{Lists}}}\)


  • One-dimensional array
  • The elements in a list do not have to be related in any way

\({\color{dodgerblue}{\textbf{Creating}}}\)

Basic List

list(A_vector, B_vector)
## [[1]]
## [1] 1 2 3
## 
## [[2]]
## [1] 4 5 6

Named List

list("A" = A_vector,
     "B" = B_vector)
## $A
## [1] 1 2 3
## 
## $B
## [1] 4 5 6

Named List

eg_list <- list(A_vector, B_vector)
names(eg_list) <- c("A", "B")

eg_list
## $A
## [1] 1 2 3
## 
## $B
## [1] 4 5 6

\({\color{dodgerblue}{\textbf{Indexing}}}\)

Extract specific item

eg_list[[1]]
## [1] 1 2 3

Extract specific item by name

eg_list[["A"]]
## [1] 1 2 3

Extract specific item by name

eg_list$A
## [1] 1 2 3

Extract specific item within a specific item

eg_list[[1]][1]
## [1] 1

\({\color{dodgerblue}{\textbf{Editing}}}\)

Combining Lists

c(eg_list, eg_list)
## $A
## [1] 1 2 3
## 
## $B
## [1] 4 5 6
## 
## $A
## [1] 1 2 3
## 
## $B
## [1] 4 5 6

Adding/Changing Items

eg_list[3] <- 100
eg_list
## $A
## [1] 1 2 3
## 
## $B
## [1] 4 5 6
## 
## [[3]]
## [1] 100

\({\color{darkblue}{\textbf{If Statement}}}\)


Single If

if (condition) {
  expression
}

If/Else

if (condition) {
  expression_1
} else {
  expression_2
}

If and Multiple Elses

if (condition_1) {
  expression_1
} else if (condition_2) {
  expression_2
} else {
  expression_3
}

\({\color{darkblue}{\textbf{Loops}}}\)


\({\color{dodgerblue}{\textbf{While}}}\)

External Condition

while (condition) {
  expression
}

Indexed

i <- 1
while (i <= 10) {
  expression
  
  i <- i + 1
}

\({\color{dodgerblue}{\textbf{For}}}\)

Iterate over a list

fruit_list <- c("Apples", "Strawberries", "Bananas")

for (item in fruit_list) {
  print(str_c("I love ",item))
}
## [1] "I love Apples"
## [1] "I love Strawberries"
## [1] "I love Bananas"

Apply a function over a list

number_list <- c(1,2,3)

for (i in seq_along(number_list)) {
  print(number_list[i] + 1)
}
## [1] 2
## [1] 3
## [1] 4

Fill in a new list

dummy_list <- vector("double", length(number_list))

for (i in seq_along(number_list)) {
  dummy_list[[i]] <- number_list[i] + 1
}

dummy_list
## [1] 2 3 4

Functions


Existing

install.packages("package")
library(package)
update.packages()
  • abs(): Calculate the absolute value.
  • sum(): Calculate the sum of all the values in a data structure.
  • mean(): Calculate the arithmetic mean.
  • round(): Round the values to 0 decimal places by default.

  • seq(): Generate sequences, by specifying the from, to, and by arguments.
  • rep(): Replicate elements of vectors and lists.
  • sort(): Sort a vector in ascending order. Works on numerics, but also on character strings and logicals.
  • rev(): Reverse the elements in a data structures for which reversal is defined.
  • str(): Display the structure of any R object.
  • append(): Merge vectors or lists.
  • is.*(): Check for the class of an R object.
  • as.*(): Convert an R object from one class to another.
  • unlist(): Flatten (possibly embedded) lists to produce a vector.


Creating

function_name <- function (argument_1, argument_2) {
  expression
}

function_name <- function (argument_1, argument_2 = default_value) {
  expression
}

function_name <- function(argument_1, ...) {
  expression using argument_1 and any number of other arguments 
}

function_name <- function(argument) {
  if (condition) {
    stop("Error message", call. = FALSE)
  }
  expression 
}

Applying

  • lapply returns a list
  • sapply tries to return a simplified list as a vector or matrix
  • vapply is like sapply but with the result type specified “data type(length)”
lapply(vector, existing_function)
sapply(vector, existing_function)
vapply(vector, existing_function, numeric(4))

lapply(vector, existing_function, other_arguments)

lapply(vector, function(argument){new_function expression})

Regular Expressions


Apply

  • grepl() returns TRUE when a pattern is found in the corresponding character string.
  • grep() returns a vector of indices of the character strings that contains the pattern.
grep(regular_expression, search_location)
  • sub() replaces the first match
  • gsub() replaces all matches
gep(regular_expression, replace_value, search_location)

Dates and Times


Now

Sys.Date()
## [1] "2019-04-23"
Sys.time()
## [1] "2019-04-23 13:36:37 EDT"

Setting

  • %Y: 4-digit year (1982)
  • %y: 2-digit year (82)
  • %m: 2-digit month (01)
  • %d: 2-digit day of the month (13)
  • %A: weekday (Wednesday)
  • %a: abbreviated weekday (Wed)
  • %B: month (January)
  • %b: abbreviated month (Jan)
as.Date("1982-01-13")
## [1] "1982-01-13"
as.Date("Jan-13-82", format = "%b-%d-%y")
## [1] "1982-01-13"
as.Date("13 January, 1982", format = "%d %B, %Y")
## [1] "1982-01-13"
  • %H: hours as a decimal number (00-23)
  • %I: hours as a decimal number (01-12)
  • %M: minutes as a decimal number
  • %S: seconds as a decimal number
  • %T: shorthand notation for the typical format %H:%M:%S
  • %p: AM/PM indicator
as.POSIXct("2012-5-12 14:23:08")
## [1] "2012-05-12 14:23:08 EDT"
as.POSIXct("May 12, '12 hours:14 minutes:23 seconds:08", 
           format = "%B %d, '%y hours:%H minutes:%M seconds:%S")
## [1] "2012-05-12 14:23:08 EDT"

Reformatting

Sys.Date()
## [1] "2019-04-23"
format(Sys.Date(), format = "%d %B, %Y")
## [1] "23 April, 2019"
format(Sys.Date(), format = "Today is a %A!")
## [1] "Today is a Tuesday!"